{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.desired_capabilities import DesiredCapabilities\n",
    "import time\n",
    "from bs4 import BeautifulSoup\n",
    "\n",
    "\n",
    "driver = webdriver.Remote(\n",
    "            command_executor=\"http://192.168.50.16:4444/wd/hub\",\n",
    "            desired_capabilities=DesiredCapabilities.CHROME,\n",
    "        )\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "urls=['https://www.zhihu.com/question/429241970','https://www.zhihu.com/question/20427672',]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "url= urls[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "72\n"
     ]
    }
   ],
   "source": [
    "is_login=False\n",
    "#----------------------\n",
    "\n",
    "driver.get(url)\n",
    "login_require = False\n",
    "is_end = False\n",
    "\n",
    "while(is_end == False):\n",
    "    #向下翻页 并且等待 5 s\n",
    "    driver.execute_script(\"window.scrollTo(0, document.body.scrollHeight);\")\n",
    "    time.sleep(2)\n",
    "    \n",
    "    soup = BeautifulSoup(driver.page_source, 'html.parser')\n",
    "    #如果未登录的条件下 检测是否 弹出需要登陆 并关闭\n",
    "    if is_login != True:\n",
    "        login_require =soup.find('svg',attrs={'class':'Zi Zi--Close Modal-closeIcon','fill':'currentColor',})!=None\n",
    "        if login_require :\n",
    "            driver.find_element_by_css_selector('body > div:nth-child(12) > div > div > div > div.Modal.Modal--default.signFlowModal > button > svg').click()\n",
    "\n",
    "    #检测 是否到达页面最底部\n",
    "    is_end = soup.find('button',attrs={'type':'button','class':'Button QuestionAnswers-answerButton Button--blue Button--spread'}) !=None\n",
    "\n",
    "    \n",
    "    \n",
    "\n",
    "soup = BeautifulSoup(driver.page_source, 'html.parser')\n",
    "answers=soup.find_all('div',attrs={\"class\":\"List-item\",\"tabindex\":\"0\",})\n",
    "print(len(answers))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "74\n"
     ]
    }
   ],
   "source": [
    "#检测是否存在被折叠的答案\n",
    "exist_collapsed_answers=soup.find('div',attrs={\"class\":\"CollapsedAnswers-bar\",})\n",
    "if exist_collapsed_answers:\n",
    "    driver.find_element_by_css_selector('#root > div > main > div > div.Question-main > div.Question-mainColumn > div > div.CollapsedAnswers-bar > button').click()\n",
    "soup = BeautifulSoup(driver.page_source, 'html.parser')\n",
    "answers=soup.find_all('div',attrs={\"class\":\"List-item\",\"tabindex\":\"0\",})\n",
    "print(len(answers))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'匿名用户3 人赞同了该回答现在的人真可笑，承认别人优秀有这么难？发布于 16 小时前\\u200b赞同 3\\u200b\\u200b14 条评论\\u200b分享\\u200b收藏\\u200b喜欢继续浏览内容知乎发现更大的世界打开Chrome继续'"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "answers[-3].text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['https://www.zhihu.com/question/429241970/answer/1564260555', 'https://www.zhihu.com/people/zhi-ye-wen-ku-40']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564270324', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564457945', 'https://www.zhihu.com/people/wen-gu-38-93']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564645536', 'https://www.zhihu.com/people/shen-lin-31']\n",
      "['https://www.zhihu.com/question/429241970/answer/1563887812', 'https://www.zhihu.com/people/serenayu']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564082060', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564247041', 'https://www.zhihu.com/people/ouyangchaoyun']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564653993', 'https://www.zhihu.com/people/stephen-zheng-52']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564343624', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564842988', 'https://www.zhihu.com/people/wang-qing-yang-68-26']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564360535', 'https://www.zhihu.com/people/hua-da-xian-92']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564827573', 'https://www.zhihu.com/people/mars-23-27-22']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564527750', 'https://www.zhihu.com/people/feng-lai-xiang-man-xiu-opt']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564785123', 'https://www.zhihu.com/people/xie-wen-qiang-89']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564849420', 'https://www.zhihu.com/people/cang-bai-de-zheng-yi-47']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564720950', 'https://www.zhihu.com/people/xiong-xiong-68-85-52']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564620002', 'https://www.zhihu.com/people/pang-pang-xiao']\n",
      "['https://www.zhihu.com/question/429241970/answer/1563934913', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564129406', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564769112', 'https://www.zhihu.com/people/annelilyleo']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564722845', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564677983', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564698738', 'https://www.zhihu.com/people/sun-wei-xin-77']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564750961', 'https://www.zhihu.com/people/wanger-42-34']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564722410', 'https://www.zhihu.com/people/zhua-zhua-81-64']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564461048', 'https://www.zhihu.com/people/cys-72-68']\n",
      "['https://www.zhihu.com/question/429241970/answer/1563827849', 'https://www.zhihu.com/people/kidcgy-14']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564763787', 'https://www.zhihu.com/people/wu-xing-78-88']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564494935', 'https://www.zhihu.com/people/fei-zhai-da-ku-qwq']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564711167', 'https://www.zhihu.com/people/bai-57-58']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564267128', 'https://www.zhihu.com/people/se-si-32']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564680002', 'https://www.zhihu.com/people/wang-yue-65-73']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564572971', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564664334', 'https://www.zhihu.com/people/charlary']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564535702', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1563947377', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564827019', 'https://www.zhihu.com/people/unity-73-65']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564291179', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564681854', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1563961196', 'https://www.zhihu.com/people/shao-xu-yan-40']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564675373', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564474503', 'https://www.zhihu.com/people/gui-du-you-39']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564311656', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564724780', 'https://www.zhihu.com/people/zhi-hu-niu-bi-qiu-qiu-ni-bie-feng-wo']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564710868', 'https://www.zhihu.com/people/ji-qing-wei-36']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564163970', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1563992723', 'https://www.zhihu.com/people/hxf-31-45']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564843239', 'https://www.zhihu.com/people/ha-ha-ha-ha-ha-ha-ha-8-63']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564830941', 'https://www.zhihu.com/people/la-jiao-chao-rou-66-91']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564816795', 'https://www.zhihu.com/people/applefine']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564816508', 'https://www.zhihu.com/people/garyxiao-ming']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564805358', 'https://www.zhihu.com/people/gin-3-94']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564797556', 'https://www.zhihu.com/people/feng-yun-yue-yu']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564796026', 'https://www.zhihu.com/people/mao-er-ni-chi-wei-a']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564768956', 'https://www.zhihu.com/people/fa-ai-ban-sheng']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564717498', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564551127', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1563929467', 'https://www.zhihu.com/people/alexis-16-17']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564690158', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564724719', 'https://www.zhihu.com/people/sui-yue-zhi-shi']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564691552', 'https://www.zhihu.com/people/pumpkins-97']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564722665', 'https://www.zhihu.com/people/tong-xie-fen-fei']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564756780', 'https://www.zhihu.com/people/zhang-peng-fei-12-62']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564556600', 'https://www.zhihu.com/people/yang-pei-wen-22']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564533362', 'https://www.zhihu.com/people/xiao-xi-90-42-52']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564482945', 'https://www.zhihu.com/people/ye-lv-su-41']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564753073', 'https://www.zhihu.com/people/kuai-gua-liao']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564759661', 'https://www.zhihu.com/people/ao-lei-li-ya-nuo-56-55']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564790147', 'https://www.zhihu.com/people/hong-chen-yun-lian-pu-ti-gen']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564797204', 'https://www.zhihu.com/people/xiao-fei-77-44']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564731495', 'https://www.zhihu.com/people/sui-guang-57-47']\n",
      "['https://www.zhihu.com/question/429241970/answer/1563995517', 'https://www.zhihu.com/people/']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564710767', 'https://www.zhihu.com/people/zoku_xu']\n",
      "['https://www.zhihu.com/question/429241970/answer/1564445463', 'https://www.zhihu.com/people/wliop']\n"
     ]
    }
   ],
   "source": [
    "answers_urls=[]\n",
    "for answer in answers:\n",
    "    print([i.get('content') for i in answer.find_all('meta',attrs={'itemprop':'url'})][::-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "在 00 后的人眼里，成年人对他们有哪些错误的认知？ - 知乎\n"
     ]
    }
   ],
   "source": [
    "print(driver.title)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "with open('test/zhihu_question_and_answers.html','w',encoding='utf8') as f:\n",
    "    f.write(driver.page_source)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "with open('test/zhihu_question_and_answers.html','r',encoding='utf8') as f:\n",
    "    html=f.read()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "soup = BeautifulSoup(driver.page_source, 'html.parser')\n",
    "answers=soup.find_all('div',attrs={\"class\":\"List-item\",\"tabindex\":\"0\",})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(answers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['知乎用户',\n",
       " '38 人赞同了该回答',\n",
       " '嗯。就像80后看90后一样。就是这样～00后只不过是一个标示，时代决定我们接触的东西和上一代人不一样了，但是我们始终可以作为上一代人的一个缩影。60因为接触的思想不同再加上所了解到部分少数案例就觉得70要逆天，同理70觉得80逆天，80觉得90逆天，90嘛 就觉得00逆天了。不要因为接触到部分案例就觉得00要垮了，当初80也这样看90  事实上呢，90的很多能够承担起社会责任。',\n",
       " '发布于 2012-08-17',\n",
       " '\\u200b',\n",
       " '赞同 38',\n",
       " '\\u200b',\n",
       " '\\u200b',\n",
       " '31 条评论',\n",
       " '\\u200b',\n",
       " '分享',\n",
       " '\\u200b',\n",
       " '收藏',\n",
       " '\\u200b',\n",
       " '喜欢',\n",
       " '继续浏览内容',\n",
       " '知乎',\n",
       " '发现更大的世界',\n",
       " '打开',\n",
       " 'Chrome',\n",
       " '继续']"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[i for i in answers[0].strings]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.execute_script(\"window.scrollTo(0, document.body.scrollHeight);\")#向下翻页"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "soup = BeautifulSoup(driver.page_source, 'html.parser')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "login_require =soup.find('svg',attrs={'class':'Zi Zi--Close Modal-closeIcon','fill':'currentColor',})!=None\n",
    "is_end = soup.find('button',attrs={'type':'button','class':'Button QuestionAnswers-answerButton Button--blue Button--spread'}) !=None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True True\n"
     ]
    }
   ],
   "source": [
    "print(login_require,is_end)\n",
    "\n",
    "if login_require :\n",
    "    driver.find_element_by_css_selector('body > div:nth-child(12) > div > div > div > div.Modal.Modal--default.signFlowModal > button > svg').click()\n",
    "if is_end:\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "login_require"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "is_end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "answers=soup.find_all('div',attrs={\"class\":\"List-item\",\"tabindex\":\"0\",})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(answers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.close()\n",
    "driver.quit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
